Take Home Exercise 1

Published

November 30, 2023

Modified

December 12, 2023

Getting Started

pacman::p_load(sf, tmap, sfdep, tidyverse)

Geospatial Data

bus stop geospatial data in singapore

busStops <- st_read(dsn = "data/geospatial",
                 layer = "BusStop") %>%
  st_transform(crs = 3414)
Reading layer `BusStop' from data source 
  `/Users/michaelberlian/Desktop/SMU Nov/ISSS624 GeoSpatial/ISSS624/Take-Home_Ex/Take-Home_Ex1/data/geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 5161 features and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 3970.122 ymin: 26482.1 xmax: 48284.56 ymax: 52983.82
Projected CRS: SVY21

Plotting Bus Stop points

tmap_mode("view")
tmap mode set to interactive viewing
tm_shape(busStops) +
  tm_dots()

Hexagonal Grid Creation

creating our own hexagonal grid

grid = st_make_grid(busStops, c(500), what = "polygons", square = FALSE)
# To sf and add grid ID
grid_sf = st_sf(grid) %>%
  # add grid ID
  mutate(grid_id = 1:length(lengths(grid)))
grid_sf$n_colli = lengths(st_intersects(grid_sf, busStops))

# remove grid without value of 0 (i.e. no points in side that grid)
grid_count = filter(grid_sf,n_colli > 0 )
tmap_mode("view")
tmap mode set to interactive viewing
tm_shape(grid_count) +
  tm_fill(
    col = "n_colli",
    palette = "Blues",
    style = "cont",
    title = "Number of collisions",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
    popup.vars = c(
      "Number of collisions: " = "n_colli"
    ),
    popup.format = list(
      n_colli = list(format = "f", digits = 0)
    )
  ) +
  tm_borders(col = "grey40", lwd = 0.7)

removing bus stop grids outside of singapore

grid_count_rm <- grid_count %>%
  filter(!grid_id == 1767,
         !grid_id == 2073,
         !grid_id == 2135,
         !grid_id == 2104)

Aspatial Data

importing the bus trip data of August 2023.

un-comment the one of other 2 lines and comment the first line using (cmd/ctrl+shift+c) to switch the data set into the other months.

busTrips <- read_csv("data/aspatial/origin_destination_bus_202308.csv")
Rows: 5709512 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): YEAR_MONTH, DAY_TYPE, PT_TYPE, ORIGIN_PT_CODE, DESTINATION_PT_CODE
dbl (2): TIME_PER_HOUR, TOTAL_TRIPS

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# busTrips <- read_csv("data/aspatial/origin_destination_bus_202309.csv")
# busTrips <- read_csv("data/aspatial/origin_destination_bus_202310.csv")

busTrips$ORIGIN_PT_CODE <- as.factor(busTrips$ORIGIN_PT_CODE)
busTrips$DESTINATION_PT_CODE <- as.factor(busTrips$DESTINATION_PT_CODE)

Data wrangling

Aspatial Data Wrangling

calculating bus trip in weekday and morning peak

busTripsDayMorning <- busTrips %>%
  filter(DAY_TYPE == "WEEKDAY", 
         TIME_PER_HOUR >= 6, 
         TIME_PER_HOUR <= 9) %>%
  group_by(ORIGIN_PT_CODE) %>%
  summarise(WeekdayMorningTrips = sum(TOTAL_TRIPS))

calculating bus trip in weekday and afternoon peak

busTripsDayAfternoon <- busTrips %>%
  filter(DAY_TYPE == "WEEKDAY", 
         TIME_PER_HOUR >= 17, 
         TIME_PER_HOUR <= 20) %>%
  group_by(ORIGIN_PT_CODE) %>%
  summarise(WeekdayAfternoonTrips = sum(TOTAL_TRIPS))

calculating bus trip in weekend and morning peak

busTripsEndMorning <- busTrips %>%
  filter(DAY_TYPE == "WEEKENDS/HOLIDAY", 
         TIME_PER_HOUR >= 11, 
         TIME_PER_HOUR <= 14) %>%
  group_by(ORIGIN_PT_CODE) %>%
  summarise(WeekendMorningTrips = sum(TOTAL_TRIPS))

calculating bus trip in weekend and evening peak

busTripsEndEvening <- busTrips %>%
  filter(DAY_TYPE == "WEEKENDS/HOLIDAY", 
         TIME_PER_HOUR >= 16, 
         TIME_PER_HOUR <= 19) %>%
  group_by(ORIGIN_PT_CODE) %>%
  summarise(WeekendEveningTrips = sum(TOTAL_TRIPS))

Combining the Peak Trips

BusTrips_comb <- busTripsDayMorning %>%
  left_join(busTripsDayAfternoon) %>%
  left_join(busTripsEndMorning) %>%
  left_join(busTripsEndEvening)
Joining with `by = join_by(ORIGIN_PT_CODE)`
Joining with `by = join_by(ORIGIN_PT_CODE)`
Joining with `by = join_by(ORIGIN_PT_CODE)`

Geospatial Data Wrangling

connecting the bus stop and the hexagonal grids

grid_bus <- st_join(grid_count_rm,busStops,join = st_contains) %>%
  unnest() %>%
  select(grid_id,BUS_STOP_N)
Warning: `cols` is now required when using `unnest()`.
ℹ Please use `cols = c(grid)`.
grid_bus$BUS_STOP_N <- as.factor(grid_bus$BUS_STOP_N)

Joining aspatial and geospatial

combine the number of trip data and the geospatial data.

transforming the data based on the hexagonal grid id

Trips <- left_join(BusTrips_comb,grid_bus,
                   by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
  group_by(grid_id)%>%
  summarise(WeekdayMorningTrips = sum(WeekdayMorningTrips),
            WeekdayAfternoonTrips = sum(WeekdayAfternoonTrips),
            WeekendMorningTrips = sum(WeekendMorningTrips),
            WeekendEveningTrips = sum(WeekendEveningTrips))
Trips <- left_join(grid_count_rm,Trips) %>%
  mutate (Total_Trips = WeekdayMorningTrips+WeekdayAfternoonTrips+WeekendMorningTrips  +WeekendEveningTrips) %>% 
  rename (n_bus = n_colli)
Joining with `by = join_by(grid_id)`

Trip per bus stop, to see if the trip amount is caused by number of bus station or is it really packed

TripsPerBusStop <- Trips %>%
  mutate (WeekdayMorningTrips = WeekdayMorningTrips/n_bus,
          WeekdayAfternoonTrips = WeekdayAfternoonTrips/n_bus,
          WeekendMorningTrips = WeekendMorningTrips/n_bus,
          WeekendEveningTrips = WeekendEveningTrips/n_bus)

Using log to see if there is skewness

TripsLog <- Trips %>%
  mutate (WeekdayMorningTrips = log(WeekdayMorningTrips),
          WeekdayAfternoonTrips = log(WeekdayAfternoonTrips),
          WeekendMorningTrips = log(WeekendMorningTrips),
          WeekendEveningTrips = log(WeekendEveningTrips))

Visualising

initial look of the overall number of trips in the peak times

tmap_mode("view")
tmap mode set to interactive viewing
tm_shape(Trips) +
  tm_fill(
    col = "Total_Trips",
    palette = "Blues",
    style = "quantile",
    title = "Total Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
    popup.format = list(
      Total_Trips = list(format = "f", digits = 0)
    )
  ) +
  tm_borders(col = "grey40", lwd = 0.7)
Warning: popup.vars not specified whereas popup.format is a list

The plot above are from total bus trip per origin bus station.

From the plot of total trips above, can be seen that bus trips are spreaded around across the country. However, there are several clusters can be seen.

Total Trips

plotting the trips of each peak hours

weekday_morning <- tm_shape(Trips) +
  tm_fill(
    col = "WeekdayMorningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekday Morning Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekday_afternoon <- tm_shape(Trips) +
  tm_fill(
    col = "WeekdayAfternoonTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekday Afternoon Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekend_morning <- tm_shape(Trips) +
  tm_fill(
    col = "WeekendMorningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekend Morning Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekend_evening <- tm_shape(Trips) +
  tm_fill(
    col = "WeekendEveningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekend Evening Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
tmap_mode("plot")
tmap mode set to plotting
tmap_arrange(weekday_morning, weekday_afternoon, weekend_morning, weekend_evening,
            ncol=2, nrow=2)

The plot above are segragated trips divided into 4 categories: Weekday morning, Weekday afternoon, Weekend morning, and weekend evening. There are only minimal differences between them. However a noticable different is on the east side. The east side quantile is lower on the evening or afternoon than in the morning both on weekend and weekday. Also, in the central can be seen that they have a higher quantile during the afternoon and evening.

Total Trips per Bus Stop

plotting the number of trips per peak hours per number of bus station

weekday_morning_per_stop <- tm_shape(TripsPerBusStop) +
  tm_fill(
    col = "WeekdayMorningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekday Morning Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekday_afternoon_per_stop <- tm_shape(TripsPerBusStop) +
  tm_fill(
    col = "WeekdayAfternoonTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekday Afternoon Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekend_morning_per_stop <- tm_shape(TripsPerBusStop) +
  tm_fill(
    col = "WeekendMorningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekend Morning Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekend_evening_per_stop <- tm_shape(TripsPerBusStop) +
  tm_fill(
    col = "WeekendEveningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekend Evening Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
tmap_mode("plot")
tmap mode set to plotting
tmap_arrange(weekday_morning_per_stop, weekday_afternoon_per_stop,
             weekend_morning_per_stop, weekend_evening_per_stop,
            ncol=2, nrow=2) 

the plot above shows the differences of total number of trips based of the time of day and the day. The number of total trip in a hexagon are divided per each number of bus station in the hexagon. However, the result are not change by a lot as well.

Log value of trips

plotting the log number of total trips for each peak hour times to see the skewness.

weekday_morning_log <- tm_shape(TripsLog) +
  tm_fill(
    col = "WeekdayMorningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekday Morning Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekday_afternoon_log <- tm_shape(TripsLog) +
  tm_fill(
    col = "WeekdayAfternoonTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekday Afternoon Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekend_morning_log <- tm_shape(TripsLog) +
  tm_fill(
    col = "WeekendMorningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekend Morning Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekend_evening_log <- tm_shape(TripsLog) +
  tm_fill(
    col = "WeekendEveningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekend Evening Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
tmap_mode("plot")
tmap mode set to plotting
tmap_arrange(weekday_morning_log, weekday_afternoon_log,
             weekend_morning_log, weekend_evening_log,
            ncol=2, nrow=2)

The above plot shows the value of total number of trip per hexagon after applying log to the number. There are also minimal differences between them and previous plots.

Local Indicators of Spatial Association (LISA) Analysis

perfoming LISA analysis to see the correlation between hexagonal grids

Spatial Weight

using the inverse distance based weight to see the correlation between the hexagons.

wm_idw <- Trips %>%
  mutate(nb = st_dist_band(grid),
         wts = st_inverse_distance(nb, grid,
                                   scale = 1,
                                   alpha = 1),
         .before = 1) %>%
  mutate(grid_id = 1:length(Trips$grid_id))
! Polygon provided. Using point on surface.
! Polygon provided. Using point on surface.

Local Moran

calculating the local moran of every peak hours trips

Weekday Morning

lisa_DM <- wm_idw %>% 
  mutate(local_moran = local_moran(
    WeekdayMorningTrips, nb, wts, nsim = 99, na.action=na.pass),
         .before = 1) %>%
  unnest(local_moran)
Warning: There were 2 warnings in `stopifnot()`.
The first warning was:
ℹ In argument: `local_moran = local_moran(WeekdayMorningTrips, nb, wts, nsim =
  99, na.action = na.pass)`.
Caused by warning in `lag.listw()`:
! NAs in lagged values
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
ii_val_moran_DM <- tm_shape(lisa_DM) +
  tm_fill("ii") + 
  tm_borders(alpha = 0.5) +
  tm_view(set.zoom.limits = c(6,8)) +
  tm_layout(main.title = "local Moran's I of Trip",
            main.title.size = 0.8)
p_val_moran_DM <- tm_shape(lisa_DM) +
  tm_fill("p_ii_sim") + 
  tm_borders(alpha = 0.5) +
   tm_layout(main.title = "p-value of local Moran's I",
            main.title.size = 0.8)
tmap_arrange(ii_val_moran_DM, p_val_moran_DM, ncol = 2)
Variable(s) "ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

lisa_sig_DM <- lisa_DM  %>%
  filter(p_ii_sim < 0.05)
pal <- RColorBrewer::brewer.pal("Set1", n = 5)

weekday_morning_moran <- tm_shape(lisa_DM) +
  tm_polygons() +
  tm_borders(alpha = 0.5) +
tm_shape(lisa_sig_DM) +
  tm_fill("mean", palette = pal , title = "Weekday Morning Trips") + 
  tm_borders(alpha = 0.4) +
  tm_legend(scale = 0.5)

Weekday Afternoon

lisa_DA <- wm_idw %>% 
  mutate(local_moran = local_moran(
    WeekdayAfternoonTrips, nb, wts, nsim = 99, na.action=na.pass),
         .before = 1) %>%
  unnest(local_moran)
Warning: There were 2 warnings in `stopifnot()`.
The first warning was:
ℹ In argument: `local_moran = local_moran(WeekdayAfternoonTrips, nb, wts, nsim
  = 99, na.action = na.pass)`.
Caused by warning in `lag.listw()`:
! NAs in lagged values
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
ii_val_moran_DA <- tm_shape(lisa_DA) +
  tm_fill("ii") + 
  tm_borders(alpha = 0.5) +
  tm_view(set.zoom.limits = c(6,8)) +
  tm_layout(main.title = "local Moran's I of Trip",
            main.title.size = 0.8)
p_val_moran_DA <- tm_shape(lisa_DA) +
  tm_fill("p_ii_sim") + 
  tm_borders(alpha = 0.5) +
   tm_layout(main.title = "p-value of local Moran's I",
            main.title.size = 0.8)
tmap_arrange(ii_val_moran_DA, p_val_moran_DA, ncol = 2)
Variable(s) "ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

lisa_sig_DA <- lisa_DA  %>%
  filter(p_ii_sim < 0.05)
pal <- RColorBrewer::brewer.pal("Set1", n = 5)

weekday_afternoon_moran <- tm_shape(lisa_DA) +
  tm_polygons() +
  tm_borders(alpha = 0.5) +
tm_shape(lisa_sig_DA) +
  tm_fill("mean", palette = pal , title = "Weekday Afternoon Trips") + 
  tm_borders(alpha = 0.4) +
  tm_legend(scale = 0.5)

Weekend Morning

lisa_EM <- wm_idw %>% 
  mutate(local_moran = local_moran(
    WeekendMorningTrips, nb, wts, nsim = 99, na.action=na.pass),
         .before = 1) %>%
  unnest(local_moran)
Warning: There were 2 warnings in `stopifnot()`.
The first warning was:
ℹ In argument: `local_moran = local_moran(WeekendMorningTrips, nb, wts, nsim =
  99, na.action = na.pass)`.
Caused by warning in `lag.listw()`:
! NAs in lagged values
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
ii_val_moran_EM <- tm_shape(lisa_EM) +
  tm_fill("ii") + 
  tm_borders(alpha = 0.5) +
  tm_view(set.zoom.limits = c(6,8)) +
  tm_layout(main.title = "local Moran's I of Trip",
            main.title.size = 0.8)
p_val_moran_EM <- tm_shape(lisa_EM) +
  tm_fill("p_ii_sim") + 
  tm_borders(alpha = 0.5) +
   tm_layout(main.title = "p-value of local Moran's I",
            main.title.size = 0.8)
tmap_arrange(ii_val_moran_EM, p_val_moran_EM, ncol = 2)
Variable(s) "ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

lisa_sig_EM <- lisa_EM  %>%
  filter(p_ii_sim < 0.05)
pal <- RColorBrewer::brewer.pal("Set1", n = 5)

weekend_morning_moran <- tm_shape(lisa_EM) +
  tm_polygons() +
  tm_borders(alpha = 0.5) +
tm_shape(lisa_sig_EM) +
  tm_fill("mean", palette = pal , title = "Weekend Morning Trips" ) + 
  tm_borders(alpha = 0.4) +
  tm_legend(scale = 0.5)

Weekend Evening

lisa_EE <- wm_idw %>% 
  mutate(local_moran = local_moran(
    WeekendEveningTrips, nb, wts, nsim = 99, na.action=na.pass),
         .before = 1) %>%
  unnest(local_moran)
Warning: There were 2 warnings in `stopifnot()`.
The first warning was:
ℹ In argument: `local_moran = local_moran(WeekendEveningTrips, nb, wts, nsim =
  99, na.action = na.pass)`.
Caused by warning in `lag.listw()`:
! NAs in lagged values
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
ii_val_moran_EE <- tm_shape(lisa_EE) +
  tm_fill("ii") + 
  tm_borders(alpha = 0.5) +
  tm_view(set.zoom.limits = c(6,8)) +
  tm_layout(main.title = "local Moran's I of Trip",
            main.title.size = 0.8)
p_val_moran_EE <- tm_shape(lisa_EE) +
  tm_fill("p_ii_sim") + 
  tm_borders(alpha = 0.5) +
   tm_layout(main.title = "p-value of local Moran's I",
            main.title.size = 0.8)
tmap_arrange(ii_val_moran_EE, p_val_moran_EE, ncol = 2)
Variable(s) "ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

lisa_sig_EE <- lisa_EE  %>%
  filter(p_ii_sim < 0.05)
pal <- RColorBrewer::brewer.pal("Set1", n = 5)

weekend_evening_moran <- tm_shape(lisa_EE) +
  tm_polygons() +
  tm_borders(alpha = 0.5) +
tm_shape(lisa_sig_EE) +
  tm_fill("mean", palette = pal , title = "Weekend Evening Trips") + 
  tm_borders(alpha = 0.4) +
  tm_legend(scale = 0.5)

LISA MAP

plotting the LISA map

tmap_arrange(weekday_morning_moran, weekday_afternoon_moran,
             weekend_morning_moran, weekend_evening_moran,
             ncol = 2, nrow = 2)
Warning: One tm layer group has duplicated layer types, which are omitted. To
draw multiple layers of the same type, use multiple layer groups (i.e. specify
tm_shape prior to each of them).

Warning: One tm layer group has duplicated layer types, which are omitted. To
draw multiple layers of the same type, use multiple layer groups (i.e. specify
tm_shape prior to each of them).

Warning: One tm layer group has duplicated layer types, which are omitted. To
draw multiple layers of the same type, use multiple layer groups (i.e. specify
tm_shape prior to each of them).

Warning: One tm layer group has duplicated layer types, which are omitted. To
draw multiple layers of the same type, use multiple layer groups (i.e. specify
tm_shape prior to each of them).

Conclusion

The graph above shows the local moran association between each hexagonal grids using the inverse distance weight.

The difference between week and and weekday is that on the weekend there are some similar clustering of low-low and high-high spread across the country, however on weekday most of it are only either high-low and low-high mean dispersion.

Can also be seen on the top 2 graph for weekday, on morning the dispersion happens in outer Singapore or suburban area, on the other hand on the afternoon the dispersion happen more toward the center. This is following the fact that early in the morning people go to work or school and go home in the after noon.

On the weekend graphs can be seen that most of the dispersion and clustering happened in the central area in the morning and a bit spread out around during the night.